***Please add to publications “We acknowledge the analytical contributions of the CU Boulder Earth Systems Stable Isotope Lab (CUBES-SIL) Core Facility (RRID:SCR_019300)”
Methods:Run Details:
Runs went well, this is a combination of data
from Run2 (April 10) and Run3 (April 17)
Precision on the isotope
scale corrections were +/- 0.192 permil (d13C, “offset” corr) . All
analyses (samples = sa, standards = st) culled are listed below (see
output file for details on culling):
d13C.error.S = 0.139
We also ran a Certified Reference Material for seawater DIC concentration ( https://www.ncei.noaa.gov/access/ocean-carbon-acidification-data-system/oceans/Dickson_CRM/batches.html , batch 157 ) with known [DIC] = 2049.43 uM. Our instrument+corrections/calibration gave (n=2) mean [DIC] = 2566 uM (corresponding to mean peak area = 47 Vs for 1ml CO2 ref samples, this is higher than normal and also there was indication of air contamination in the chromatograhy); d13C mean = -0.35‰; (d13C results spectacularly a bit off compared to the 16-lab comparison measured mean value of 0.88 +/- 0.06 from the same Certified Reference Material batch https://aslopubs.onlinelibrary.wiley.com/doi/full/10.1002/lom3.10300).
Three ways to consider [DIC] errors include: First, using the
calibration curve generated by the standards, errors are estimated as
95% confidence interval based on the Standard Error term
from the inverse.predict function (the predictive power of the linear
model) - these had a mean error of 211 uM reflecting the noise around
the calibration curve, this drops if some of the lin.std farthest from
the linear model are removed. Second, consider the standard deviation of
the 5 seawater samples (41 uM), finally run triplicates of a few
representative samples.
Sample Details: Problem standards (do not use or use with
caution) (all samples were good, or so small they are clearly below LOQ
and ended up in the culled data set):
DO NOT USE
NA for
isotopes
These standards too big and auto dilution kicked in (ok
for isotopes) but not useful for [DIC] “26752”, “26751”, “26801”,
“26802”
#devtools::install_github("isoverse/isoprocessor")
library(isoprocessor) # processing isotope data files
## Loading required package: isoreader
##
## Attaching package: 'isoreader'
## The following object is masked from 'package:stats':
##
## filter
##
## Attaching package: 'isoprocessor'
## The following object is masked from 'package:stats':
##
## filter
library(tidyverse) # dplyr, tidyr, ggplot, etc.
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks isoprocessor::filter(), isoreader::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx) # read/write from excel
library(plotly) # interactive plots
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(isoreader) # reading isotope data files
library(chemCal) # calculations from standard curve
library(isoprocessCUBES) #if loading this package for the first time, be sure to install the "devtools" package first, then install the isoprocessCUBES package: devtools::install_github("CUBESSIL/isoprocessCUBES")
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(broom)
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(kableExtra) #(requires webshot::install_phantomjs() install.packages("magick") install.packages("webshot"))
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(png)
library(grid)
rawfiles.directory<- file.path(".") #enter the name of your raw data folder here; should be in the same directory as your .Rproj file
session<-"DIC 1ml run" #update this to name your own session
Run <- "1"
dxf_files<-iso_read_continuous_flow(rawfiles.directory,read_vendor_data_table = TRUE,quiet = T) #reads in raw data files and extracts necessary data
dxf_files <- iso_omit_files_with_problems(dxf_files)
## Warning: `iso_omit_files_with_problems()` was deprecated in isoreader 1.3.0.
## ℹ Please use `iso_filter_files_with_problems()` instead.
## ℹ Function renamed for simplification.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Info: removing 0/72 files that have any error (keeping 72)
dxf_data <- iso_get_vendor_data_table(dxf_files, include_file_info = everything()) #converts necessary data into usable data frame
## Info: aggregating vendor data table from 72 data file(s), including file info 'everything()'
raw.data <- dxf_data %>%
# exclude flush gas; change to check gas if that is the term used
filter(`Identifier 2` !="Flush Gas") %>%
#changes column names to match input file headers; adjust as needed if someone puts data in different column than normal in the sequence file
rename(row = Row,
mass = Comment,
Identifier1 = `Identifier 1`,
type = `Identifier 2`,
d13Craw = `d 13C/12C`,
d18Oraw = `d 18O/16O`,
Ampl44 = `Ampl 44`,
Area44 = `Intensity 44`) %>%
#changes mass data type to numeric data instead of character
mutate(mass = as.numeric(mass), row = as.numeric(row)) %>%
#if needed, make specific changes in individual cells; comment out if not needed; this example removes a space of some of the entries for "sample" so they are all the same
mutate(#type = str_replace(type, "sampled", "mon.std"),
`Identifier1` = ifelse(`Identifier1`== "YULE", "CU YULE", `Identifier1`),
Identifier1 = ifelse(Analysis== 20302, "CU YULE", Identifier1),
Identifier1 = ifelse(Analysis== 20303, "IAEA-603", Identifier1),
Identifier1 = ifelse(Analysis== 20304, "Merck", Identifier1),
type = ifelse(Analysis== 20302, "drift.std", type),
type = ifelse(Analysis== 20303, "mon.std", type),
type = ifelse(Analysis== 20304, "dis2.std", type)
)
#another way to change miss labeled or typos
#updating name and mass for 2 swapped standards (based on their isotope values and adjacent positions)
raw.data[ raw.data$Analysis == 26804, "Identifier1"] <- "seawater standard"
raw.data[ raw.data$Analysis == 26803, "Identifier1"] <- "seawater standard"
raw.data[ raw.data$Analysis == 26750, "Identifier1"] <- "CU YULE"
raw.data[ raw.data$Analysis == 26750, "mass"] <- 159
raw.data[ raw.data$Analysis == 26750, "type"] <- "mon.std"
raw.data[ raw.data$Analysis == 26751, "mass"] <- 994
raw.data[ raw.data$Analysis == 26752, "mass"] <- 605
raw.data[ raw.data$Analysis == 26753, "mass"] <- 487
raw.data[ raw.data$Analysis == 26754, "mass"] <- 301
raw.data[ raw.data$Analysis == 26755, "mass"] <- 266
raw.data[ raw.data$Analysis == 26756, "mass"] <- 197
raw.data[ raw.data$Analysis == 26757, "Identifier1"] <- "IAEA-603"
raw.data[ raw.data$Analysis == 26757, "mass"] <- 121
raw.data[ raw.data$Analysis == 26757, "type"] <- "lin.std"
raw.data[ raw.data$Analysis == 26789, "type"] <- "dis.std" #change IAEA C2 to dis.std
raw.data[ raw.data$Analysis == 26790, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26791, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26795, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26796, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26797, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26798, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26799, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26800, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26801, "type"] <- "dis.std"
raw.data[ raw.data$Analysis == 26802, "type"] <- "dis.std"
#input all standard values in PDB mineral values; be sure to adjust for YOUR standards
Standards <- readRDS(url("https://github.com/cubessil/clumpsDRcodes/blob/master/Standards.RDS?raw=true"))
mon.std<-"CU YULE" #enter your standard names here
corr.std<-"IAEA-603"
dis.std<-"IAEA-C2"
dis2.std<-"USGS44"
Standardsforthisrun <- Standards %>% select(Sample.ID, d13C,d18O) %>% filter(Sample.ID %in% c(corr.std, mon.std, dis.std, dis2.std)) %>%
rename("std.name"= Sample.ID,
"C.acc" = d13C,
"O.acc" = d18O)
C.stds.table <- Standardsforthisrun %>% select(std.name, C.acc)
O.stds.table <- Standardsforthisrun %>% select(std.name, O.acc)
desiredorder <- c("IAEA-603", "CU YULE", "IAEA-C2","USGS44")
C.stds.table <-C.stds.table %>% arrange(sapply(std.name, function(y) which(y == desiredorder)))
C.stds.table$type <- c("drift.std", "mon.std", "dis.std", "dis2.std")
O.stds.table <-O.stds.table %>% arrange(sapply(std.name, function(y) which(y == desiredorder)))
O.stds.table$type <- c("drift.std", "mon.std", "dis.std", "dis2.std")
#if you run just the code above it matches the tables as below. However at some point in the code it looks for C.acc and O.acc as a variable not from a table.
#if I add the below code it fixes it but as you can guess this is not ideal
C.acc <- C.stds.table %>% filter(std.name == corr.std)
C.acc <- C.acc$C.acc
O.acc <- O.stds.table %>% filter(std.name == corr.std)
O.acc <- O.acc$O.acc
data <-
raw.data %>%
filter(Nr. > 6) %>%
group_by(row, file_id, Analysis, Identifier1, mass, type) %>%
summarize(
num.peaks=n(),
d13C.raw=mean(as.numeric(d13Craw)),
d13C.sd=sd(d13Craw),
d18O.raw.SMOW=mean(as.numeric(d18Oraw)),
d18O.sd=sd(d18Oraw),
ampl44=mean(as.numeric(Ampl44)),
ampl44.sd=sd(as.numeric(Ampl44)),
area44=mean(as.numeric(Area44)),
area44.sd=sd(Area44),
inv.area44=1/area44
) %>% mutate(
Do_not_use ="",
Why_culled =""
)
## `summarise()` has grouped output by 'row', 'file_id', 'Analysis',
## 'Identifier1', 'mass'. You can override using the `.groups` argument.
culled.data<- raw.data %>%
group_by(row, file_id, Analysis, Identifier1, mass, type) %>%
summarize(
num.peaks=n()) %>%
filter (num.peaks<6)
## `summarise()` has grouped output by 'row', 'file_id', 'Analysis',
## 'Identifier1', 'mass'. You can override using the `.groups` argument.
culled.data <- bind_rows(culled.data, filter(data, num.peaks==1))
data$d18O.raw<-(data$d18O.raw.SMOW-41.43)/1.04143 #d18O output is CO2 SMOW, so need to use the CO2 SMOW to min PDB conversion
stdev<-gather(filter(data, num.peaks>1, d18O.sd<100), key = "isotope",
value= "stdev", d13C.sd, d18O.sd)
Identify outliers: Look at standard deviations of the stable isotope values of the individual peaks (typically there are 10 peaks). Often the “cutoff” of outliers tends to be sd of 0.075 - 0.1 permil, and coincides with small peak areas
sd.values<-ggplot(stdev, aes(x=row, y=stdev, fill=Identifier1)) +
geom_point(size=3, shape=21) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(fill=FALSE) +
facet_grid(. ~ isotope)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
sd.hist<-ggplot(stdev, aes(x=stdev, fill=isotope)) +
geom_histogram(binwidth=.01) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(fill=FALSE) +
facet_grid(. ~ isotope)
sd.box<-ggplot(stdev, aes(x=isotope, y=stdev, fill=isotope)) +
geom_boxplot()
sd.v.area44<-ggplot(stdev, aes(x=area44, y=stdev, fill=factor(num.peaks))) +
geom_point(size=3, shape=21) +
scale_fill_discrete() +
facet_grid(. ~ isotope)
multiplot(sd.values, sd.hist, sd.box, sd.v.area44, cols=1)
Remove major outliers based on large standard deviation of peaks, then redo plots - also, adds culled data to “culled data” tab that shows samples and standards that shouldn’t be used
#adjust line 121: change the value of d18O.sd.cutoff to match the cutoff of outliers based on the previous plots; default is 0.1
d18O.sd.cutoff <- 0.5
culled.data <- bind_rows(culled.data, subset(data, d18O.sd>d18O.sd.cutoff))
wo.culled <- subset(data, d18O.sd<d18O.sd.cutoff)
stds1<- subset(data, type!="sample" & d18O.sd<d18O.sd.cutoff)
stdev<-gather(wo.culled, key = "isotope",
value= "stdev", d13C.sd, d18O.sd)
sd.values<-ggplot(stdev, aes(x=row, y=stdev, fill=Identifier1)) +
geom_point(size=3, shape=21) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(fill=FALSE) +
facet_grid(. ~ isotope)
sd.hist<-ggplot(stdev, aes(x=stdev, fill=isotope)) +
geom_histogram(binwidth=.005) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(fill=FALSE) +
facet_grid(. ~ isotope)
sd.box<-ggplot(stdev, aes(x=isotope, y=stdev, fill=isotope)) +
geom_boxplot()
sd.v.area44<-ggplot(stdev, aes(x=area44, y=stdev, fill=factor(num.peaks))) +
geom_point(size=3, shape=21) +
scale_fill_discrete() +
facet_grid(. ~ isotope)
multiplot(sd.values, sd.hist, sd.v.area44, sd.box, cols=1)
Plot yields of all samples and standards, as well as just the standards, using INTERACTIVE plots. Look for major outliers that coincide with yield problems. Note that the linear model in the yield.stds figure here is based on ALL the standard data
Check to see that samples all fall within linearity range, and for any other obvious outliers: * do linearity standards cover the area space of your samples and other standards? * do all the standards show typical trends between area and mass? Do you see very general trends like that in your samples (sample sets with wide ranges in weight percent carbonate will not show a strong relationship)?
# adjust next line only: change #'s in stds.to.cull to reflect the row #'s that need to be culled, add row#'s as needed and rerun after looking at the new plots
stds.to.cull <- c(1) #26754,26755,26756,26750
# #Analysis #s with yield problem and/or significant outlier in d13C or d18O, including samples smaller than linearity range that weren't caught by other culling steps
stds.culled <- filter(stds1, Analysis %in% stds.to.cull)
stds <- filter(stds1, !Analysis %in% stds.to.cull)
culled.data<-bind_rows(culled.data, stds.culled)
stds<- data.frame(stds, cbind(predict.lm(lm(stds$area44 ~ stds$mass), interval=c("confidence"), level=0.95)))
yield.all<-ggplot(data, aes(x=mass, y=area44, fill=type, label=Analysis)) +
geom_point(size=3, shape=22) +
theme_bw() +
labs(title= "all data")
yield.stds<-ggplot(stds, aes(x=mass, y=area44, label=Analysis)) +
stat_smooth(method="lm") +
geom_point(aes(fill=type, shape=type), size=2) +
theme_bw() +
scale_shape_manual(values=c(21,22,23,24,25)) +
labs(title= "standards - yield")
calc_std_means_d13C <- function(df) calc_means(df, "d13C.raw")
d13C.stds <-
ggplot(stds, aes(label=Analysis)) +
geom_hline(
data = calc_std_means_d13C,
mapping = aes(yintercept = yintercept, color = type, linetype = linetype)) +
geom_point(shape=21, mapping = aes(x=area44, y=d13C.raw, fill=type)) +
scale_linetype_manual(values = c(1, 3, 2, 3, 2)) +
facet_grid(type ~ ., scales = "free") +
theme_bw() +
labs(title= "d13C standards - means and uncertainties")
calc_std_means_d18O <- function(df) calc_means(df, "d18O.raw")
d18O.stds <-
ggplot(stds, aes(label=Analysis)) +
geom_hline(
data = calc_std_means_d18O,
mapping = aes(yintercept = yintercept, color = type, linetype = linetype)) +
geom_point(shape=21, mapping = aes(x=area44, y=d18O.raw, fill=type)) +
scale_linetype_manual(values = c(1, 3, 2, 3, 2)) +
facet_grid(type ~ ., scales = "free") +
theme(legend.position = "none") +
theme_bw() +
labs(title= "d18O standards - means and uncertainties")
ggplotly(yield.all)
ggplotly(yield.stds)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
ggplotly(d18O.stds)
ggplotly(d13C.stds)
Makes plots of individual analysis files for analyses in the culled.data dataframe - use to check peak sizes, signs of atmospheric peaks, and other irregularities
dxf_files %>%
iso_filter_files(file_id %in% culled.data$file_id) %>%
iso_plot_raw_data(data = c(44), panel = "file", color = "data")
## Info: applying file filter, keeping 6 of 72 files
## Info: plotting data from 6 data file(s)
#If there are any additional chromatograms needed, just input the analysis numbers into the more.files list, replacing data$Analysis[5] with analysis number(s) - for example c(9536, 9655) instead of c(data$Analysis[5]). data$Analysis[5] is just acting as a stand-in to prevent errors if no additional analyses are selected
#more.files <- c(data$Analysis[5])
more.files <- c(26760, 26787)
plot.more <- filter(data, Analysis %in% more.files)
dxf_files %>%
iso_filter_files(file_id %in% plot.more$file_id) %>%
iso_plot_raw_data(data = c(44), panel = "file", color = "data") %>%
ggplotly()
## Info: applying file filter, keeping 2 of 72 files
## Info: plotting data from 2 data file(s)
yield.line <- lm(stds$mass ~ stds$area44)
(yield.slope <- coef(yield.line)[[2]])
## [1] 5.517157
(yield.intercept <- coef(yield.line)[[1]])
## [1] 65.76171
data$PercentCO3 <- ((yield.slope * data$area44 + yield.intercept)/data$mass *100)
data$target.wgt.ug <- 90/(data$PercentCO3/100)
raw.corr <- filter(stds, Identifier1==corr.std)
raw.mon <- filter(stds, type=="mon.std")
raw.dis <- filter(stds, type=="dis.std")
raw.dis2 <- filter(stds, type=="dis2.std")
C.stds.table$rawC.mean <- c(mean(raw.corr$d13C.raw), mean(raw.mon$d13C.raw), mean(raw.dis$d13C.raw), mean(raw.dis2$d13C.raw))
C.stds.table$rawC.sd <- c(sd(raw.corr$d13C.raw), sd(raw.mon$d13C.raw), sd(raw.dis$d13C.raw), sd(raw.dis2$d13C.raw))
C.stds.table$rawC.n <- c(length(raw.corr$d13C.raw), length(raw.mon$d13C.raw), length(raw.dis$d13C.raw), length(raw.dis2$d13C.raw))
O.stds.table$rawO.mean <- c(mean(raw.corr$d18O.raw), mean(raw.mon$d18O.raw), mean(raw.dis$d18O.raw), mean(raw.dis2$d18O.raw))
O.stds.table$rawO.sd <- c(sd(raw.corr$d18O.raw), sd(raw.mon$d18O.raw), sd(raw.dis$d18O.raw), sd(raw.dis2$d18O.raw))
O.stds.table$rawO.n <- c(length(raw.corr$d18O.raw), length(raw.mon$d18O.raw), length(raw.dis$d18O.raw), length(raw.dis2$d18O.raw))
Evaluate average offset and standard deviation of combined linearity and drift standards. Does NOT include linearity or drift correction to data
offsetC<-subset(stds, Identifier1==corr.std)
(offsetC.mean<-mean(offsetC$d13C.raw))
## [1] 2.47304
(offsetC.sd<-sd(offsetC$d13C.raw))
## [1] 0.1022045
offsetC$d13C.offset <- offsetC$d13C.raw + (C.acc - offsetC.mean)
(offsetcorrC.mean<-mean(offsetC$d13C.offset))
## [1] 2.46
(offsetcorrC.sd<-sd(offsetC$d13C.offset))
## [1] 0.1022045
d13C.offset<-ggplot(offsetC, aes(x=area44, y=d13C.offset, shape=type)) +
geom_point(fill="orange", size=3) +
geom_hline(yintercept=offsetcorrC.mean, colour="orange") +
geom_hline(yintercept=offsetcorrC.mean + offsetcorrC.sd, colour="orange", linetype="dashed") +
geom_hline(yintercept=offsetcorrC.mean - offsetcorrC.sd, colour="orange", linetype="dashed") +
geom_hline(yintercept=offsetcorrC.mean + 2*offsetcorrC.sd, colour="orange", linetype=3) +
geom_hline(yintercept=offsetcorrC.mean - 2*offsetcorrC.sd, colour="orange", linetype=3) +
scale_shape_manual(values=c(21,22,23,24,25)) +
annotate("text", y = offsetcorrC.mean + 0.01, x = min(offsetC$area44),
label = paste0("mean: ", sprintf("%.2f", offsetcorrC.mean), " \U00B1 ", sprintf("%.2f", offsetcorrC.sd, 2), " \U2030 (1 sd)"),
size = 4, hjust=0, vjust=0, parse=FALSE, colour="black") +
theme_bw()
## Warning in sprintf("%.2f", offsetcorrC.sd, 2): one argument not used by format
## '%.2f'
d13C.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) +
stat_smooth(method="lm") +
geom_point(data=offsetC, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) +
theme_bw()
multiplot(d13C.offset, d13C.offset.mass, cols=2)
## `geom_smooth()` using formula = 'y ~ x'
Assess effects: Apply offset correction to the whole dataset of raw values, and check the monitoring standards
#apply offset correction to whole dataset
data$d13C.offset <- data$d13C.raw + (C.acc - offsetC.mean)
stds$d13C.offset <- stds$d13C.raw + (C.acc - offsetC.mean)
#make monitoring standard dataset and dataset for additional standards used later for discrimination correction
offsetC.mon <- subset(stds, Identifier1==mon.std)
offsetC.dis <- subset(stds, Identifier1==dis.std)
offsetC.dis2 <- subset(stds, Identifier1==dis2.std)
#check monitoring standard response
(offsetC.mon.mean<-mean(offsetC.mon$d13C.offset))
## [1] -3.127191
(offsetC.mon.sd<-sd(offsetC.mon$d13C.offset))
## [1] 0.09312972
C.stds.table$offsetC.mean <- c(offsetcorrC.mean, offsetC.mon.mean, mean(offsetC.dis$d13C.offset), mean(offsetC.dis2$d13C.offset))
C.stds.table$offsetC.sd <- c(offsetcorrC.sd, offsetC.mon.sd, sd(offsetC.dis$d13C.offset), sd(offsetC.dis2$d13C.offset))
C.mon.offset<-ggplot(offsetC.mon, aes(x=area44, y=d13C.offset)) +
geom_point(shape=21, fill="orange") +
geom_hline(yintercept=offsetC.mon.mean, colour="orange") +
geom_hline(yintercept=offsetC.mon.mean + offsetC.mon.sd, colour="orange", linetype="dashed") +
geom_hline(yintercept=offsetC.mon.mean - offsetC.mon.sd, colour="orange", linetype="dashed") +
geom_hline(yintercept=offsetC.mon.mean + 2*offsetC.mon.sd, colour="orange", linetype=3) +
geom_hline(yintercept=offsetC.mon.mean - 2*offsetC.mon.sd, colour="orange", linetype=3) +
annotate("text", y = offsetC.mon.mean +0.01, x = min(offsetC.mon$area44), label = paste0("mean: ", sprintf("%.2f", offsetC.mon.mean), " \U00B1 ", sprintf("%.2f", offsetC.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) +
theme_bw()
## Warning in sprintf("%.2f", offsetC.mon.sd, 2): one argument not used by format
## '%.2f'
C.mon.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) +
stat_smooth(method="lm") +
geom_point(data=offsetC.mon, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) +
theme_bw()
multiplot(C.mon.offset, C.mon.offset.mass, cols=2)
## `geom_smooth()` using formula = 'y ~ x'
Using the raw values, assess drift in isotope values throughout the run
driftC<-subset(stds, type=="drift.std")
drift.slopeC<-(coef(lm(as.numeric(driftC$d13C.raw) ~ driftC$row))[[2]])
drift.interC<-(coef(lm(as.numeric(driftC$d13C.raw) ~ driftC$row))[[1]])
#drift check
driftC$d13C.drift<- driftC$d13C.raw + (C.acc - (drift.slopeC * driftC$row + drift.interC))
(driftC.mean<-mean(driftC$d13C.drift))
## [1] 2.46
(driftC.sd<-sd(driftC$d13C.drift))
## [1] 0.06411247
C.drift<-ggplot(driftC, aes(x=row, y=d13C.raw)) +
geom_smooth(method=lm, colour="black") +
annotate("text", x = min(driftC$row), y = max(driftC$d13C.raw + 0.01), label = lm_eqn(driftC$row, driftC$d13C.raw), size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") +
geom_point(shape=21, fill="black", size=2) +
geom_point(aes(x=row, y=d13C.drift), fill="red", shape=21, size=2) +
geom_hline(aes(yintercept=C.acc), size=.5) +
geom_hline(yintercept = driftC.mean + driftC.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = driftC.mean - driftC.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = driftC.mean + 2*driftC.sd, colour="red", linetype=3) +
geom_hline(yintercept = driftC.mean - 2*driftC.sd, colour="red", linetype=3) +
annotate("text",
y = driftC.mean +0.01,
x = min(driftC$row),
label = paste0("mean: ", sprintf("%.2f", driftC.mean), " \U00B1 ", sprintf("%.2f", driftC.sd, 2), " \U2030 (1 sd)"),
size = 4, hjust=0, vjust=0, parse=FALSE) +
theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in sprintf("%.2f", driftC.sd, 2): one argument not used by format
## '%.2f'
C.drift.mass<-ggplot(stds, aes(x=mass, y=area44)) +
stat_smooth(method="lm") +
geom_point(data=driftC, aes(x=mass, y=area44), shape=21, fill="red", size=3) +
theme_bw()
multiplot(C.drift, C.drift.mass, cols=2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Assess effects: Apply drift correction to the whole dataset of raw values, and check the monitoring standards
data$d13C.drift <- data$d13C.raw + (C.acc - (drift.slopeC * data$row + drift.interC))
stds$d13C.drift <- stds$d13C.raw + (C.acc - (drift.slopeC * stds$row + drift.interC))
driftC.mon<-subset(stds, Identifier1==mon.std)
driftC.dis <- subset(stds, Identifier1==dis.std)
driftC.dis2 <- subset(stds, Identifier1==dis2.std)
(driftC.mon.mean<-mean(driftC.mon$d13C.drift))
## [1] -3.119543
(driftC.mon.sd<-sd(driftC.mon$d13C.drift))
## [1] 0.1154887
C.stds.table$driftC.mean <- c(driftC.mean, driftC.mon.mean, mean(driftC.dis$d13C.drift),mean(driftC.dis2$d13C.drift))
C.stds.table$driftC.sd <- c(driftC.sd, driftC.mon.sd, sd(driftC.dis$d13C.drift),sd(driftC.dis2$d13C.drift))
C.mon.drift<-ggplot(driftC.mon, aes(x=area44, y=d13C.drift)) +
geom_point(shape=21, fill="red") +
geom_hline(yintercept = driftC.mon.mean, colour="red") +
geom_hline(yintercept = driftC.mon.mean + driftC.mon.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = driftC.mon.mean - driftC.mon.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = driftC.mon.mean + 2*driftC.mon.sd, colour="red", linetype=3) +
geom_hline(yintercept = driftC.mon.mean - 2*driftC.mon.sd, colour="red", linetype=3) +
annotate("text",
y = driftC.mon.mean +0.01,
x = min(driftC.mon$area44),
label = paste0("mean: ", sprintf("%.2f", driftC.mon.mean), " \U00B1 ", sprintf("%.2f", driftC.mon.sd, 2), " \U2030 (1 sd)"),
size = 4, hjust=0, vjust=0, parse=FALSE, colour="red")
## Warning in sprintf("%.2f", driftC.mon.sd, 2): one argument not used by format
## '%.2f'
C.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) +
stat_smooth(method="lm") +
geom_point(data=driftC.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) +
theme_bw()
multiplot(C.mon.drift, C.mon.drift.mass, cols=2)
## `geom_smooth()` using formula = 'y ~ x'
Using the raw values, assess drift in isotope values throughout the run
linC<-subset(stds, type=="lin.std")
lin.slopeC<-(coef(lm(linC$d13C.raw ~ linC$inv.area44))[[2]])
lin.interC<-(coef(lm(linC$d13C.raw ~ linC$inv.area44))[[1]])
#linearity check
linC$d13C.lin<-linC$d13C.raw + (C.acc - (lin.slopeC * linC$inv.area44 + lin.interC))
(linC.mean<-mean(linC$d13C.lin))
## [1] 2.46
(linC.sd<-sd(linC$d13C.lin))
## [1] 0.03306552
C.lin.area44<-ggplot(linC, aes(x=area44, y=d13C.raw)) +
geom_point(shape=21, fill="blue") +
geom_smooth()
C.lincorr.invarea<-ggplot(linC, aes(x=inv.area44, y=d13C.raw)) +
geom_smooth(method=lm) +
annotate("text", x = min(linC$inv.area44), y = max(linC$d13C.raw + 0.01), label = lm_eqn(linC$inv.area44, linC$d13C.raw), size = 4, hjust=0, vjust=0, parse=TRUE) +
geom_point(shape=21, fill="black", size=2) +
geom_point(aes(x=inv.area44, y=d13C.lin), fill="red", shape=22) +
geom_hline(aes(yintercept=C.acc), size=.5) +
geom_hline(yintercept = linC.mean + linC.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = linC.mean - linC.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = linC.mean + 2*linC.sd, colour="red", linetype=3) +
geom_hline(yintercept = linC.mean - 2*linC.sd, colour="red", linetype=3) +
annotate("text",
y = linC.mean +0.01,
x = min(linC$inv.area44),
label = paste0("mean: ", sprintf("%.2f", linC.mean), " \U00B1 ", sprintf("%.2f", linC.sd, 2), " \U2030 (1 sd)"),
size = 4, hjust=0, vjust=0, parse=FALSE) +
theme_bw()
## Warning in sprintf("%.2f", linC.sd, 2): one argument not used by format '%.2f'
C.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) +
stat_smooth(method="lm") +
geom_point(data=linC, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) +
theme_bw()
multiplot(C.lin.area44, C.lincorr.invarea, C.lin.mass, cols=3)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Assess effects: Apply linearity correction to the whole dataset of raw values, and check the monitoring standards
data$d13C.lin <- data$d13C.raw + (C.acc - (lin.slopeC * data$inv.area44 + lin.interC))
stds$d13C.lin <- stds$d13C.raw + (C.acc - (lin.slopeC * stds$inv.area44 + lin.interC))
linC.mon<-subset(stds, Identifier1==mon.std)
linC.dis<-subset(stds, Identifier1==dis.std)
linC.dis2<-subset(stds, Identifier1==dis2.std)
(linC.mon.mean <- mean(linC.mon$d13C.lin))
## [1] -3.174818
(linC.mon.sd<-sd(linC.mon$d13C.lin))
## [1] 0.08798347
C.stds.table$linC.mean <- c(linC.mean, linC.mon.mean, mean(linC.dis$d13C.lin),mean(linC.dis2$d13C.lin))
C.stds.table$linC.sd <- c(linC.sd, linC.mon.sd, sd(linC.dis$d13C.lin), sd(linC.dis2$d13C.lin))
C.mon.lin<-ggplot(linC.mon, aes(x=area44, y=d13C.lin)) +
geom_point(shape=21, fill="blue") +
geom_hline(yintercept = linC.mon.mean, colour="blue") +
geom_hline(yintercept = linC.mon.mean + linC.mon.sd, colour="blue", linetype="dashed") +
geom_hline(yintercept = linC.mon.mean - linC.mon.sd, colour="blue", linetype="dashed") +
geom_hline(yintercept = linC.mon.mean + 2*linC.mon.sd, colour="blue", linetype=3) +
geom_hline(yintercept = linC.mon.mean - 2*linC.mon.sd, colour="blue", linetype=3) +
annotate("text",
y = linC.mon.mean +0.01,
x = min(linC.mon$area44),
label = paste0("mean: ", sprintf("%.2f", linC.mon.mean), " \U00B1 ", sprintf("%.2f", linC.mon.sd, 2), " \U2030 (1 sd)"),
size = 4, hjust=0, vjust=0, parse=FALSE, colour="blue")
## Warning in sprintf("%.2f", linC.mon.sd, 2): one argument not used by format
## '%.2f'
C.mon.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) +
stat_smooth(method="lm") +
geom_point(data=linC.mon, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) +
theme_bw()
multiplot(C.mon.lin, C.mon.lin.mass, cols=2)
## `geom_smooth()` using formula = 'y ~ x'
Because a run can be affected by both drift and linearity, here we assess the affects of apply both the linearity and drift corrections to the raw data. This uses the linearity corrected values, and looks at the drift in those values
lindriftC <- merge(driftC, data[c("row", "d13C.lin")], by.x="row", by.y="row", all.x=TRUE, all.y=FALSE, sort=FALSE)
lindrift.slopeC<-(coef(lm(lindriftC$d13C.lin ~ lindriftC$row))[[2]])
lindrift.interC<-(coef(lm(lindriftC$d13C.lin ~ lindriftC$row))[[1]])
#drift check
lindriftC$d13C.lindrift<- lindriftC$d13C.lin + (C.acc - (lindrift.slopeC * lindriftC$row + lindrift.interC))
(lindriftC.mean<-mean(lindriftC$d13C.drift))
## [1] 2.46
(lindriftC.sd<-sd(lindriftC$d13C.drift))
## [1] 0.06411247
C.lindrift<-ggplot(lindriftC, aes(x=row, y=d13C.lin)) +
geom_smooth(method=lm, colour="black") +
annotate("text", x = min(lindriftC$row), y = max(lindriftC$d13C.lin + 0.01), label = lm_eqn(lindriftC$row, lindriftC$d13C.lin), size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") +
geom_point(shape=21, fill="black", size=2) +
geom_point(aes(x=row, y=d13C.lindrift), fill="red", shape=22, size=2) +
geom_hline(aes(yintercept=C.acc), size=.5) +
geom_hline(yintercept = lindriftC.mean + lindriftC.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = lindriftC.mean - lindriftC.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = lindriftC.mean + 2*lindriftC.sd, colour="red", linetype=3) +
geom_hline(yintercept = lindriftC.mean - 2*lindriftC.sd, colour="red", linetype=3) +
annotate("text",
y = lindriftC.mean +0.01,
x = min(lindriftC$row),
label = paste0("mean: ", sprintf("%.2f", lindriftC.mean), " \U00B1 ", sprintf("%.2f", lindriftC.sd, 2), " \U2030 (1 sd)"),
size = 4, hjust=0, vjust=0, parse=FALSE) +
theme_bw()
## Warning in sprintf("%.2f", lindriftC.sd, 2): one argument not used by format
## '%.2f'
C.lindrift.mass<-ggplot(stds, aes(x=mass, y=area44)) +
stat_smooth(method="lm") +
geom_point(data=lindriftC, aes(x=mass, y=area44), shape=21, fill="red", size=3) +
theme_bw()
multiplot(C.lindrift, C.lindrift.mass, cols=2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Assess effects: Apply drift correction to the whole dataset of linearity corrected values, and check the monitoring standards
data$d13C.lindrift <- data$d13C.lin + (C.acc - (lindrift.slopeC * data$row + lindrift.interC))
stds$d13C.lindrift <- stds$d13C.lin + (C.acc - (lindrift.slopeC * stds$row + lindrift.interC))
lindriftC.mon<-subset(stds, Identifier1==mon.std)
lindriftC.dis<-subset(stds, Identifier1==dis.std)
lindriftC.dis2<-subset(stds, Identifier1==dis2.std)
(lindriftC.mon.mean<-mean(lindriftC.mon$d13C.lindrift))
## [1] -3.112921
(lindriftC.mon.sd<-sd(lindriftC.mon$d13C.lindrift))
## [1] 0.1100928
C.stds.table$lindriftC.mean <- c(lindriftC.mean, lindriftC.mon.mean, mean(lindriftC.dis$d13C.lindrift), mean(lindriftC.dis2$d13C.lindrift))
C.stds.table$lindriftC.sd <- c(lindriftC.sd, lindriftC.mon.sd, sd(lindriftC.dis$d13C.lindrift),sd(lindriftC.dis2$d13C.lindrift))
C.mon.drift<-ggplot(lindriftC.mon, aes(x=area44, y=d13C.lindrift)) +
geom_point(shape=21, fill="red") +
geom_hline(yintercept = lindriftC.mon.mean, colour="red") +
geom_hline(yintercept = lindriftC.mon.mean + lindriftC.mon.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = lindriftC.mon.mean - lindriftC.mon.sd, colour="red", linetype="dashed") +
geom_hline(yintercept = lindriftC.mon.mean + 2*lindriftC.mon.sd, colour="red", linetype=3) +
geom_hline(yintercept = lindriftC.mon.mean - 2*lindriftC.mon.sd, colour="red", linetype=3) +
annotate("text",
y = lindriftC.mon.mean +0.01,
x = min(lindriftC.mon$area44),
label = paste0("mean: ", sprintf("%.2f", lindriftC.mon.mean), " \U00B1 ", sprintf("%.2f", lindriftC.mon.sd, 2), " \U2030 (1 sd)"),
size = 4, hjust=0, vjust=0, parse=FALSE, colour="red")
## Warning in sprintf("%.2f", lindriftC.mon.sd, 2): one argument not used by
## format '%.2f'
C.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) +
stat_smooth(method="lm") +
geom_point(data=lindriftC.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) +
theme_bw()
multiplot(C.mon.drift, C.mon.drift.mass, cols=2)
## `geom_smooth()` using formula = 'y ~ x'
Pick the appropriate column of either raw or corrected data to use in the scale regression. If there is no observable effects of linearity or drift (or both), use the raw values. The default is to select the raw values. Then this correct is applied to the whole dataset, using the same choice of previously corrected (or raw) values as used in making the scale correction
# replace the character string here with the correction column you want to use
col_for_scale_C <- "lindrift" # options to substitute in here are: "raw" or "offset" or "drift" or "lin" or "lindrift"
#make new table of measured scale correction values versus the accepted values
col_in_data <- paste0("d13C.", col_for_scale_C)
C.accepted <- C.stds.table %>%
select(std.name, C.acc)
scale.stds <- stds %>%
#filter(Identifier1 == "NBS18" |Identifier1 == "Merck.UTSA") %>% #| Identifier1 == "CU YULE"
rename(std.name = Identifier1)
scale.corr <- left_join(scale.stds, C.accepted, by = "std.name") %>%
select(std.name, C.acc, col_in_data) %>%
rename(d13C = col_in_data)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_in_data)
##
## # Now:
## data %>% select(all_of(col_in_data))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# safety checks
if (!col_in_data %in% names(data)) stop("this column does not exist in data or stds table: ", col_in_data, call. = FALSE)
# regression
m <- lm(scale.corr$C.acc ~ scale.corr$d13C)
scale.slopeC<-(coef(m)[[2]])
scale.interC<-(coef(m)[[1]])
R2 <- summary(m)$r.squared
S <- summary(m)$sigma
d13C.error.S <- S
# apply correction
data <- data %>%
mutate_(d13C.scale = lazyeval::interp(~scale.slopeC * var + scale.interC, var = as.name(col_in_data)))
## Warning: `mutate_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `mutate()` instead.
## ℹ See vignette('programming') for more help
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
stds <- stds %>%
mutate_(d13C.scale = lazyeval::interp(~scale.slopeC * var + scale.interC, var = as.name(col_in_data)))
## Warning: `mutate_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `mutate()` instead.
## ℹ See vignette('programming') for more help
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
C.scale.all <-
ggplot(scale.corr, aes(x=d13C, y=C.acc)) +
geom_smooth(method="lm", color = "blue") +
geom_point(data=filter(data, d18O.sd<d18O.sd.cutoff), aes_string(x=col_in_data, y="d13C.scale"), shape=23, fill="red", size = 2) +
geom_point(shape=21, fill="blue", size = 2) +
geom_text(
x = max(scale.corr$d13C),
y = max(scale.corr$C.acc),
label = str_interp("C.acc = $[.2f]{slope} ${var} + $[.2f]{intercept} (R2: $[.4f]{R2}) (S: $[.3f]{S})",
list(slope = scale.slopeC, intercept = scale.interC, var = col_for_scale_C, R2 = R2, S = S)),
size = 4, hjust=1.1, vjust=0, colour="blue") +
labs(x = col_for_scale_C) +
theme_bw()
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
C.scale.all
## `geom_smooth()` using formula = 'y ~ x'
Assess effects: Check the monitoring standards after applying the scale correction
scaleC.mon<-subset(stds, Identifier1==mon.std)
scaleC.dis<-subset(stds, Identifier1==dis.std)
scaleC.dis2<-subset(stds, Identifier1==dis2.std)
scaleC.corr<-subset(stds, Identifier1==corr.std)
(scaleC.mon.mean<-mean(scaleC.mon$d13C.scale))
## [1] -3.227082
(scaleC.mon.sd<-sd(scaleC.mon$d13C.scale))
## [1] 0.1117494
C.stds.table$scaleC.mean <- c(mean(scaleC.corr$d13C.scale), scaleC.mon.mean, mean(scaleC.dis$d13C.scale), mean(scaleC.dis2$d13C.scale))
C.stds.table$scaleC.sd <- c(sd(scaleC.corr$d13C.scale), scaleC.mon.sd, sd(scaleC.dis$d13C.scale), sd(scaleC.dis2$d13C.scale))
In order to calculate the signal for limit of quantitation: eq. 4.7.4 https://chem.libretexts.org/Courses/BethuneCookman_University/B-CU%3A_CH-345_Quantitative_Analysis/Book%3A_Analytical_Chemistry_2.1_(Harvey)/04%3A_Evaluating_Analytical_Data/4.07%3A_Detection_Limits
The ability to detect the analyte with confidence is not the same as the ability to report with confidence its concentration, or to distinguish between its concentration in two samples. For this reason the American Chemical Society’s Committee on Environmental Analytical Chemistry recommends the limit of quantitation, (SA)LOQ:
\((S_A)_{LOQ} = S_{mb} + 10\sigma_{mb}\) Where - \(S_{mb}\) is the signal of the method blank - \(S_{A}\) is the signal analyte at the \(LOQ\) level of quantification - \(\sigma_{mb}\) is the standard error of the method blank
Signals \(< 3\sigma\) are considered “analyte not detected”. Signals between \(3\sigma\) and \(10\sigma\) are considered detected but not quantifiable “region of detection”. Signals \(>10\sigma\) are considered within the region of quantification. Therefore, \(10\sigma\) is chosen as the limit of quantification (LOQ).
method_blanks <- data %>%
filter(Analysis %in% c(26760, 26787))
# print the blank data to a table so we can see
method_blanks %>%
select(file_id, area44) %>%
knitr::kable()
## Adding missing grouping variables: `row`, `Analysis`, `Identifier1`, `mass`
| row | Analysis | Identifier1 | mass | file_id | area44 |
|---|---|---|---|---|---|
| 19 | 26760 | blank_water_1 | 1 | 26760__blank_water_1.dxf | 0.1704247 |
| 46 | 26787 | blank_water_2 | 1 | 26787__blank_water_2.dxf | 0.1846286 |
Calculate signal and standard error of method blanks
# calculate mean signal of method blanks
S_mb <- method_blanks %>% pull(area44) %>% mean()
sd_mb <- method_blanks %>% pull(area44) %>% sd()
paste0("Method blanks exhibit mean signal (peak area) of ", round(S_mb, 2), " ± ", round(sd_mb, 2), " Vs")
## [1] "Method blanks exhibit mean signal (peak area) of 0.18 ± 0.01 Vs"
Calculate the LOQ as Vs
SA_LOQ <- S_mb + 10 * sd_mb
paste0("The limit of quantification for this run is ", round(SA_LOQ, 2), " Vs")
## [1] "The limit of quantification for this run is 0.28 Vs"
Now we’ll apply the LOQ to our data to determine which samples have quantifiable area - for samples with low DIC
data <- data %>%
mutate(
LOQ = case_when(
area44 > SA_LOQ ~ TRUE, area44 < SA_LOQ ~ FALSE
)
)
Here, we generate a linear model of peak area area44
versus the mass of \(CaCO_3\) loaded.
We then apply this model to the peak areas of our saples in order to
calculate the equivalent [DIC] in micromolar (uM).
MM_CaCO3 <- 100.0869 # the molecular weight of CaCO3 in g/mol
vol_H2O_sample_ml <- 1 # the volume of the liquid sample in the exetainer (CHANGE IF DIFFERENT VOLUME USED!)
# Remove problematic standards from our set of linear standards (defined earlier as linC)
standards_for_lm <- data %>%
filter(Identifier1 %in% c("IAEA-603", "USGS44", "CU YULE", "blank_water_1", "blank_water_2")) %>% # add ,"IAEA-C2" if you want but it makes the seawater standard 2881uM instead of ~2500um (known is 2050uM)
filter(!Analysis %in% c("26752", "26751", "26801", "26802")) # remove problematic runs (too big, autodilution set in)
# Calculate the moles of CO2 expected when acidifying
# here, mass is the mass of CaCO3 standard weighed out in ug (vonverted to g by 1e-6)
standards_for_lm <- standards_for_lm %>% mutate(mol_CO2_total_expected = mass * 1e-6 / MM_CaCO3) # add column for total moles CO2 expected
# Calculate [DIC] of initial water sample by dividing total moles CO2 by volume of water
# [DIC] is calculated first as molar (M) --> moles / L (where mL to L is *1e-3)
# We then multiply by 1e6 to convert M to uM:
standards_for_lm <- standards_for_lm %>% mutate(DIC_uM = mol_CO2_total_expected / (vol_H2O_sample_ml * 1e-3) * 1e6)
# Now we calculate a linear model of calculated DIC versus area of our standards:
# y ~ x (dependent ~ independent)
DIClm <- lm(data = standards_for_lm, area44 ~ DIC_uM)
# Extract the method parameters
DIClm_tidy <- broom::tidy(DIClm) # tidy up the linear model using broom: creates a tibble of fit params
DIClm_slope <- DIClm_tidy %>% filter(term == "DIC_uM") %>% pull(estimate)
#DIClm_yint <- DIClm_tidy %>% filter(term == "(Intercept)") %>% pull(estimate)
DIClm_yint <- S_mb # use mean of the method blanks as y intercept
# Coerce the lm to use S_mb (careful!)
DIClm$coefficients[1] <- DIClm_yint
summary(DIClm)
##
## Call:
## lm(formula = area44 ~ DIC_uM, data = standards_for_lm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0787 -2.3227 -0.9275 2.5084 7.8000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1775267 1.3522339 0.131 0.896
## DIC_uM 0.0183731 0.0007265 25.291 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.536 on 28 degrees of freedom
## Multiple R-squared: 0.9581, Adjusted R-squared: 0.9566
## F-statistic: 639.6 on 1 and 28 DF, p-value: < 2.2e-16
#prep summary image for graph:
imgfile <- stargazer(DIClm, type = "html") %>%
as_image()
##
## <table style="text-align:center"><tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>area44</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">DIC_uM</td><td>0.018<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>0.178</td></tr>
## <tr><td style="text-align:left"></td><td>(1.352)</td></tr>
## <tr><td style="text-align:left"></td><td></td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>30</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.958</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.957</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>3.536 (df = 28)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>639.644<sup>***</sup> (df = 1; 28)</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
## save_kable will have the best result with magick installed.
img <- readPNG(imgfile)
g <- rasterGrob(img, interpolate = T, width = 2.5, height = 1)
linearMod_tidy_setInt <- tidy(DIClm)
calib_DIC_4 <-
ggplot(standards_for_lm, aes(x = DIC_uM, y = area44)) +
geom_point(aes(color = Identifier1, fill = Identifier1), shape=21, size = 2)+
geom_abline(slope = DIClm_slope, intercept = DIClm_yint, color = "blue") +
#scale_y_continuous(name = latex2exp::TeX("estimated $\\lbrack$$\\Sigma$CO$_2$$\\rbrack$ (µM)"))+
#scale_x_continuous(name = latex2exp::TeX("area44 (Vs)"))+
labs() +
annotation_custom(tableGrob(DIClm_tidy, theme = ttheme_default(base_size = 6,base_colour = "red")), xmin = 0, xmax=3000,ymin = 75)+# Add tidy summary table to a plot
annotation_custom(tableGrob(linearMod_tidy_setInt, theme = ttheme_default(base_size = 6)),xmin = 0, xmax=3000,ymin = 45)+# Add adjusted tidy summary table to a plot
annotation_custom(g, xmin = 4000, xmax = 6000, ymin = 0, ymax = 25)+## Add stargazer table to a plot
theme_bw()
calib_DIC_4
# make interactive plot
calib_DIC_5 <-
ggplot(standards_for_lm, aes(y=DIC_uM, x=area44, label=Analysis))+
geom_point()+
theme_bw()
calib_DIC_5 %>% ggplotly()
#Calculate the LOQ as DIC
SA_LOQ_DIC <- inverse.predict(object = DIClm, newdata = SA_LOQ, alpha = 0.5)$Prediction
SA_LOQ_DIC
## [1] 5.466491
paste0("The limit of [DIC] quantification for this run is ", round(SA_LOQ_DIC, 2), " uM")
## [1] "The limit of [DIC] quantification for this run is 5.47 uM"
Now we apply the calibration to our samples:
# here we use inverse.predict function from chemCal to predict X based on Y
# object: the linear model we generated above
data <- data %>%
mutate(
# this pulls out the `Prediction` term from inverse.predict list object. Alpha = error tolerance (95% CI)
DIC_uM = as.numeric(inverse.predict(object = DIClm, newdata = area44, alpha = 0.5)$Prediction),
# this pulls out the `Standard Error` term from inverse.predict list object
DIC_uM_95CI = as.numeric(inverse.predict(object = DIClm, newdata = area44, alpha = 0.5)$`Standard Error`),
)
calib_samples <-
data %>%
filter(Identifier1 != "IAEA-C2", type == "sample") %>%
mutate(well = case_when(
str_detect(Identifier1, "BA3A") ~ "BA3A",
str_detect(Identifier1, "BA4A") ~ "BA4A",
str_detect(Identifier1, "BA1B") ~ "BA1B",
str_detect(Identifier1, "BA1D") ~ "BAID"
)
) %>%
ggplot(
aes(
x = area44,
y = DIC_uM,
label = Identifier1,
color = well
)
) +
geom_vline(xintercept = SA_LOQ, color = "red") +
annotate(geom = "text", label = "LOQ", x = 3, y = 4200, color = "red") +
geom_smooth(
method = "lm",
formula = "y~x"
) +
geom_point() +
geom_errorbar(aes(ymin = DIC_uM - DIC_uM_95CI, ymax = DIC_uM + DIC_uM_95CI)) +
labs(
title = "[DIC] (uM) of samples"
) +
theme_bw()
calib_samples
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# here we use inverse.predict function from chemCal to predict X based on Y
# object: the linear model we generated above
data <- data %>%
mutate(
# this pulls out the `Prediction` term from inverse.predict list object. Alpha = error tolerance (95% CI)
DIC_uM = as.numeric(inverse.predict(object = DIClm, newdata = area44, alpha = 0.5)$Prediction),
# this pulls out the `Standard Error` term from inverse.predict list object
DIC_uM_95CI = as.numeric(inverse.predict(object = DIClm, newdata = area44, alpha = 0.5)$`Standard Error`),
) %>% mutate(Volume_ml = case_when(type == "sample" ~ mass, TRUE ~ NA))
data_sample_green <- data %>%
filter( type == "sample")
data %>%
filter( type == "sample") %>%
ggplot(
aes(
x = area44,
y = DIC_uM
)
) +
geom_vline(xintercept = SA_LOQ, color = "red") +
annotate(geom = "text", label = "LOQ", x = 3, y = 4200, color = "red") +
geom_smooth(
method = "lm",
formula = "y~x"
) +
geom_point() +
geom_text(
aes(label=Identifier1),
nudge_x = 0.25, nudge_y = 0.25,
check_overlap = T
)+
geom_errorbar(aes(ymin = DIC_uM - DIC_uM_95CI, ymax = DIC_uM + DIC_uM_95CI)) +
geom_point(data = data_sample_green %>% filter(Identifier1 == "seawater standard"), color = "green")+ geom_text(data = data_sample_green %>% filter(Identifier1 == "seawater standard"),
aes(label=Identifier1),color = "green",
nudge_x = 0.25, nudge_y = 220,
check_overlap = T)+
labs(
title = "[DIC] (uM) of samples not yet adjusted for sample volumes different than 1 ml"
) +
theme_bw()
data <- data %>%
mutate( DIC_uM = ifelse(type == "sample",(DIC_uM/Volume_ml),DIC_uM)) #this adjusts the sample [DIC] values according to how much sample volume was used (so if 1 ml was volume no change, but if 0.5 ml was volume result is twice as big etc)
data_sample <- data %>%
filter( type == "sample")
calib6 <- ggplot(data_sample,
aes(
x = area44,
y = DIC_uM , color=Volume_ml)) +
geom_vline(xintercept = SA_LOQ, color = "red") +
annotate(geom = "text", label = "LOQ", x = 3, y = 4200, color = "red") +
geom_point() +
geom_text(
aes(label=Identifier1),
nudge_x = 0.25, nudge_y = 220,
check_overlap = T
)+
geom_errorbar(aes(ymin = DIC_uM - DIC_uM_95CI, ymax = DIC_uM + DIC_uM_95CI)) +
geom_point(data = data_sample %>% filter(Identifier1 == "seawater standard"), color = "green")+ geom_text(data = data_sample %>% filter(Identifier1 == "seawater standard"),
aes(label=Identifier1),color = "green",
nudge_x = 0.25, nudge_y = 220,
check_overlap = T)+
labs(
title = "[DIC] (uM) of samples adjusted for sample volumes different than 1 ml") +
theme_bw()
calib6 %>% ggplotly()
testCO2Ref <- data %>%
filter( Identifier1 == "seawater standard")
mean_seawater_standard = mean(testCO2Ref$DIC_uM)
mean_seawater_standard_d13C = mean(testCO2Ref$d13C.scale)
paste0("The mean [DIC] of the seawater standards is ", round(mean_seawater_standard, 2), " uM")
## [1] "The mean [DIC] of the seawater standards is 2566.49 uM"
paste0("The mean d13C of the seawater standards is ", round(mean_seawater_standard_d13C, 2), " uM")
## [1] "The mean d13C of the seawater standards is -0.35 uM"
#label as do not use
#label as do not use
#data[data$Analysis == xxxx, "Do_not_use"] <- "yes"
#data[data$Analysis == xxxx, "Do_not_use"] <- "yes"
# label those samples with reason they were culled, in main dataset
#data[data$Analysis == xxxx, "Why_culled"] <- "large between peak standard deviations (st)"
#data[data$Analysis == xxxx, "Why_culled"] <- "high between peak stdev; big half peak could affect data?) (sa)"
#data[data$Analysis == xxxx, "Why_culled"] <- "too small (sa)"
#data[data$Analysis == xxxx, "Why_culled"] <- "too small; sd ok but smaller than linearity range (sa)"
# label those samples with reason they were culled, in culled dataset
#culled.data[culled.data$Analysis == xxxx, "Why_culled"] <- "large between peak standard deviations (st)"
#culled.data[culled.data$Analysis == xxxx, "Why_culled"] <- "high between peak stdev; big half peak could affect data?) (sa)"
#culled.data[culled.data$Analysis == xxxx, "Why_culled"] <- "too small (sa)"
#culled.data[culled.data$Analysis == xxxx, "Why_culled"] <- "too small; sd ok but smaller than linearity range (sa)"
#add column that tells values used in the scale correction
data$d13C_scale_input <- col_for_scale_C
#data$d18O_scale_input <- col_for_scale_O
data$Run <- Run
data <- data %>% mutate(d13C.error.S)#d18O.error.S ,
# reorder columns for more readable output in output file
data <- data %>%
select(Run,row, file_id, Analysis, Identifier1, mass, type, num.peaks, area44, area44.sd, inv.area44, PercentCO3, d13C.raw, d13C.sd,,d18O.raw.SMOW, d18O.sd, d13C.offset, d13C.drift, d13C.lin, d13C.lindrift, d13C.scale, d13C.error.S, d13C_scale_input,
#d18O.raw.SMOW, d18O.sd, d18O.raw, d18O.offset, d18O.drift, d18O.lin, d18O.lindrift, d18O.scale,d18O.error.S, d18O_scale_input,
Do_not_use, Why_culled,DIC_uM,DIC_uM_95CI)
#creates subset of just samples and fully corrected data.
keydata <- data %>% ungroup() %>% filter(!(Identifier1 %in% c(corr.std, mon.std, dis.std, dis2.std))) %>% select(Run,Analysis, Identifier1, mass, area44,DIC_uM,DIC_uM_95CI,d13C.scale,d13C.error.S, PercentCO3, Do_not_use, Why_culled,d18O.raw.SMOW, d18O.sd)
add_ws_with_data <- function(wb, sheet, data) {
addWorksheet(wb, sheet)
writeData(wb, sheet=sheet, data)
return(wb)
}
wb <- createWorkbook("data")
wb <- add_ws_with_data(wb, "key data", keydata)
wb <- add_ws_with_data(wb, "all data corrected", data)
wb <- add_ws_with_data(wb, "offsetC", offsetC)
wb <- add_ws_with_data(wb, "driftC", driftC)
wb <- add_ws_with_data(wb, "linC", linC)
wb <- add_ws_with_data(wb, "lindriftC", lindriftC)
wb <- add_ws_with_data(wb, "C Stds means", C.stds.table)
wb <- add_ws_with_data(wb, "all stds used", stds)
wb <- add_ws_with_data(wb, "culled data", culled.data)
saveWorkbook(wb, paste0(session, "_corrected_data.xlsx"), overwrite = TRUE)